# ps = "~/Documents/GitHub/amchick/data/raw/rgi_CARD/sqm_rgi_chicken.txt"
#
# ps %>%
# # here::here() %>%
# read_tsv() -> df
#
# df %>%
# # head() %>%
# DT::datatable()
# ```
# ```{r}
# ps = "~/Documents/GitHub/amchick/data/raw/rgi_CARD/sqm_rgi_chicken.txt"
#
# ps %>%
# # here::here() %>%
# read_tsv() -> df
#
# df %>%
# head() %>%
# DT::datatable()
# ```
ps = "~/Documents/GitHub/amchick/data/raw/metabarcoding/merged_chicken_human_04.08.2021.tsv"
ps %>%
# here::here() %>%
read_tsv() %>%
filter(metagenomic_sample_name %!in% NA) -> meta
## Rows: 600 Columns: 61
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (19): sample, Sample_description, I7_Index_ID, index, I5_Index_ID, index...
## dbl (42): input, filtered, denoisedF, denoisedR, merged, tabled, filtered_pc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
meta %>%
head() %>%
DT::datatable()
meta
## # A tibble: 43 × 61
## sample input filtered denoisedF denoisedR merged tabled filtered_pc
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CR-15-S292 23558 23449 23410 23355 22970 22970 0.995
## 2 CR-17-S97 35320 35021 34992 34870 34419 34419 0.992
## 3 CR-19-S341 17616 17528 17501 17477 17246 17246 0.995
## 4 CR-21-S126 5791 5764 5753 5734 5585 5585 0.995
## 5 CR-46-S191 24284 24178 24128 24062 23531 23531 0.996
## 6 CR-64-S324 10847 10801 10784 10726 10465 10465 0.996
## 7 D-0-S154 11484 11428 11279 11256 10248 10248 0.995
## 8 TR1-15-S168 153 151 144 115 79 79 0.987
## 9 TR1-17-S246 16895 16833 16807 16749 16429 16429 0.996
## 10 TR1-19-S305 9274 9256 9239 9203 8959 8959 0.998
## # … with 33 more rows, and 53 more variables: denoisedF_pc <dbl>,
## # denoisedR_pc <dbl>, merged_pc <dbl>, filtered_merged_pc <dbl>,
## # input_merged_pc <dbl>, tabled_joined <dbl>, chimera_out <dbl>,
## # length_filtered <dbl>, tabled_pc <dbl>, chimera_out_pc <dbl>,
## # length_filtered_pc <dbl>, Sample_description <chr>, I7_Index_ID <chr>,
## # index <chr>, I5_Index_ID <chr>, index2 <chr>, Description2 <chr>,
## # Experiment <chr>, Reactor <chr>, Treatment <chr>, …
type = c("protein homolog model")
# df %>%
# filter(Model_type %in% type,
# Cut_Off == "Strict",
# Nudged %!in% c("TRUE"),
# Best_Identities > 95, # plot disteibution of bestID vs percentage length ref and color bitscore and symbol class AMR
# Percentage.Length.of.Reference.Sequence > 80) %>%
# select(ORF_ID,
# Best_Hit_ARO,
# Model_type,
# Drug.Class,
# Resistance.Mechanism,
# AMR.Gene.Family,
# Best_Identities,
# Percentage.Length.of.Reference.Sequence,
# Note,
# "Length AA",
# "Gene name",
# "Contig ID",
# starts_with("TPM"),
# ) -> fil_df
#
# fil_df %>%
# head() %>%
# DT::datatable()
# fil_df %>%
# write_tsv(here::here("~/Projects/ETH/Alessia/mobilome/rgi_card_table.tsv"))
"~/Documents/GitHub/amchick/tmp/rgi_card_table.tsv" %>%
read_tsv() %>%
separate(ORF_ID, into = c("cont", "cont_num", "orf"), sep = "_", remove = FALSE) %>%
mutate(cont_id = paste0(cont,"_", cont_num)) %>%
select(cont_id, everything()) -> fil_df_fil
## Rows: 108 Columns: 55
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): ORF_ID, Best_Hit_ARO, Model_type, Drug.Class, Resistance.Mechanism...
## dbl (46): Best_Identities, Percentage.Length.of.Reference.Sequence, Length A...
## lgl (1): Note
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fil_df_fil %>%
head() %>%
DT::datatable()
"/Users/fconstan/Projects/ETH/Alessia/mobilome/chicken/map_megahit_SQM.csv" %>%
read_csv() -> mapping
mapping %>%
head() %>%
DT::datatable()
"/Users/fconstan/Projects/ETH/Alessia/mobilome/chicken/01.SqueezeChicken.fasta.tsv" %>%
read_tsv() -> isescan
isescan %>%
head() %>%
DT::datatable()
isescan %>%
group_by(seqID) %>%
# add_count(family) %>%
# arrange(-n) %>%
top_n(1, wt = `E-value4copy`) %>%
top_n(1, wt = isBegin) -> isescan_top
"/Users/fconstan/Projects/ETH/Alessia/anvio_chicken_SM_last/DAS-collection.txt.txt" %>%
read_tsv(col_names = c("split", "bin")) %>%
separate(split, into = c("cont", "cont_num", "split", "split_id"), sep = "_", remove = FALSE) %>%
mutate(cont_id = paste0(cont,"_", cont_num)) %>%
distinct(cont_id, .keep_all = TRUE) %>%
select(cont_id, bin) -> DAS
DAS %>%
head %>%
DT::datatable()
"/Users/fconstan/Projects/ETH/Alessia/anvio_chicken_SM_last/DAS-collection-HQ-MAGs.txt" %>%
read_tsv(col_names = c("split", "binHQ")) %>%
separate(split, into = c("cont", "cont_num", "split", "split_id"), sep = "_", remove = FALSE) %>%
mutate(cont_id = paste0(cont,"_", cont_num)) %>%
distinct(cont_id, .keep_all = TRUE) %>%
select(cont_id, binHQ) -> DASHQ
"/Users/fconstan/Projects/ETH/Alessia/anvio_chicken_SM_last/MAGs_additional_data.tsv" %>%
read_tsv() -> MAG_tax
"/Users/fconstan/Documents/GitHub/chicken/src/12_Figures/Resistome/resistome_phyloseq.rds" %>%
readRDS() -> physeq
sample_data(physeq)$Reactor_Treatment <- fct_relevel(sample_data(physeq)$Reactor_Treatment, "DONOR", "CR_UNTREATED", "TR1_CTX+HV292.1",
"TR2_CTX","TR3_HV292.1", "TR4_VAN", "TR5_VAN+CCUG59168", "TR6_CCUG59168")
sample_data(physeq)$Treatment <- fct_relevel(sample_data(physeq)$Treatment, "DONOR", "UNTREATED", "CTX+HV292.1", "CTX","HV292.1","VAN+CCUG59168", "VAN", "CCUG59168")
Create ARG type abundance plots:
physeq_drug <- speedyseq::tax_glom(physeq, taxrank = "Drug.Class")
#define colors
set.seed(123)
n <- phyloseq::ntaxa(physeq_drug)
col <- randomcoloR::distinctColorPalette(n)
physeq_drug %>%
phyloseq::plot_bar(x = "Day_of_Treatment", fill = "Drug.Class") +
scale_fill_manual(values=col) +
# geom_bar(stat="identity",colour=NA,size=0, position = "fill") +
# geom_bar(stat="identity",colour=NA,size=0, position = "fill") +
facet_grid(~ Reactor_Treatment, scales = "free", space = "free") +
ggtitle("ARG Type Abundance per Sample") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_light() +
xlab("Day of Treatment") +
ylab("TPM")
ARG Alpha Diversity
# extract ARG abundance info from phyloseq object
physeq %>%
otu_table() %>%
as.data.frame() %>%
t() -> resistome_copy
# extract sample info
physeq %>%
sample_data() %>%
as.data.frame() -> treatment_info
# add sample info to resistome abundance data and get richeness
resistome_copy %>%
specnumber() %>%
cbind(., treatment_info) %>%
filter(., Treatment != "DONOR")-> ARG_richeness
levels(ARG_richeness$Day_of_Treatment)[ARG_richeness$Day_of_Treatment] %>%
as.numeric() -> ARG_richeness$Day_of_Treatment
colnames(ARG_richeness)[1] <- "Richeness"
# plot
ggplot(ARG_richeness, aes(x = Reactor_Treatment, y = Richeness)) +
geom_boxplot(aes(color=Reactor_Treatment),
outlier.shape = NA,
outlier.colour = NA,
outlier.size = 0) +
geom_jitter(aes(color=Reactor_Treatment), alpha = 0.4) +
theme(legend.position="none") +
xlab("Treatment") +
ylab("ARG Richness") +
theme_light() + scale_color_viridis_d(na.value = "black")
# plot panels for each treatment
ggplot(ARG_richeness, aes(x = Day_of_Treatment, y = Richeness, color = Reactor_Treatment)) +
geom_line(linetype = "dashed", size = 0.75) +
geom_point() +
labs(title = "AMR Gene Richness",
y = "Richness", x = "Days of Treatment") +
theme_light() + scale_color_viridis_d(na.value = "black")
Beta Diversity:
sample_data(physeq)$Reactor_Treatment <- fct_relevel(sample_data(physeq)$Reactor_Treatment, "DONOR", "CR_UNTREATED", "TR1_CTX+HV292.1",
"TR2_CTX","TR3_HV292.1", "TR4_VAN", "TR5_VAN+CCUG59168", "TR6_CCUG59168")
sample_data(physeq)$Treatment <- fct_relevel(sample_data(physeq)$Treatment, "DONOR", "UNTREATED", "CTX+HV292.1", "CTX","HV292.1","VAN+CCUG59168", "VAN", "CCUG59168")
# physeq %>%
# rarefy_even_depth(sample.size = 43,
# rngseed = 123) -> phyloseq_rare
physeq %>%
phyloseq_compute_bdiv(norm = "pc",
phylo = FALSE,
seed = 123) -> bdiv_list
## Loading required package: ape
## Loading required package: GUniFrac
## Registered S3 method overwritten by 'rmutil':
## method from
## print.response httr
physeq %>%
subset_samples(Treatment != "DONOR") %>%
phyloseq_plot_bdiv(dlist = bdiv_list,
seed = 123,
axis1 = 1,
axis2 = 2) -> pcoa
## [1] "bray"
## [1] "sorensen"
## [1] "bjaccard"
## [1] "wjaccard"
# phyloseq_plot_bdiv(bdiv_list,
# # m = "CoDa",
# seed = 123) -> coda
#
pcoa$wjaccard$layers = NULL
pcoa$wjaccard + geom_point(size=2,
aes(color = Reactor_Treatment,
fill = Reactor_Treatment,
shape = NULL,
alpha = Day_of_Treatment)) +
theme_light() +
geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.15, "inches")),
size = 0.05, linetype = "dashed", inherit.aes = TRUE, aes(group=Reactor_Treatment, color = Reactor_Treatment), show.legend = FALSE) +
#scale_alpha_continuous(range=c( 0.9, 0.3)) +
scale_color_viridis_d(na.value = "black") +
scale_fill_viridis_d(na.value = "black") +
# scale_shape_manual(values = c(8, 21, 22, 23, 24, 16, 15, 18, 17)) +
theme_classic() +
labs(col=NULL, fill = NULL, shape = NULL) + guides(shape=FALSE) -> p1
p1
physeq %>%
tax_table() %>%
data.frame() %>%
rownames_to_column("id") %>%
mutate(gene = id) %>%
column_to_rownames("id") %>%
as.matrix %>%
tax_table() -> tax_table(physeq)
physeq %>%
subset_samples(Treatment != "DONOR") %>%
phyloseq_add_taxa_vector(dist = bdiv_list$wjaccard,
phyloseq = .,
figure_ord = p1,
tax_rank_plot = "Drug.Class", taxrank_glom = "Drug.Class",
top_r = 10, fact = 0.6) -> pco_env
p1
pco_env$plot
pco_env$signenvfit %>%
DT::datatable()
# plot.df.sep %>%
# select(sample, TPM, Best_Hit_ARO) %>%
# pivot_wider(names_from = c("Best_Hit_ARO"), values_from = TPM) -> ready
# t() %>%
# pheatmap::pheatmap(clustering_distance_cols= "euclidean",
# cluster_distance_rows = 'pearson',
# annotation_col = full_data %>% column_to_rownames(sample_id_column) %>% dplyr::select(all_of(pca_group)),
# cutree_cols = cutree_cols,
# cutree_rows = cutree_rows,
# fontsize = 8) -> heatmap
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] GUniFrac_1.4 ape_5.5 vegan_2.5-7
## [4] lattice_0.20-45 permute_0.9-5 phyloseq_1.36.0
## [7] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
## [10] purrr_0.3.4 readr_2.1.0 tidyr_1.1.4
## [13] tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1.9000
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.4.0 plyr_1.8.6
## [4] igraph_1.2.6 splines_4.1.2 crosstalk_1.1.1
## [7] GenomeInfoDb_1.28.4 digest_0.6.29 foreach_1.5.1
## [10] htmltools_0.5.2 lmerTest_3.1-3 fansi_0.5.0
## [13] magrittr_2.0.1 cluster_2.1.2 tzdb_0.2.0
## [16] openxlsx_4.2.4 Biostrings_2.60.2 modelr_0.1.8
## [19] matrixStats_0.61.0 stabledist_0.7-1 vroom_1.5.6
## [22] colorspace_2.0-2 rvest_1.0.2 ggrepel_0.9.1
## [25] haven_2.4.3 xfun_0.28 crayon_1.4.2
## [28] RCurl_1.98-1.5 jsonlite_1.7.2 lme4_1.1-27.1
## [31] survival_3.2-13 iterators_1.0.13 glue_1.5.1
## [34] gtable_0.3.0 zlibbioc_1.38.0 XVector_0.32.0
## [37] V8_3.6.0 car_3.0-11 Rhdf5lib_1.14.2
## [40] BiocGenerics_0.38.0 abind_1.4-5 scales_1.1.1
## [43] DBI_1.1.1 randomcoloR_1.1.0.1 rstatix_0.7.0
## [46] Rcpp_1.0.7 viridisLite_0.4.0 clue_0.3-60
## [49] foreign_0.8-81 bit_4.0.4 stats4_4.1.2
## [52] DT_0.19 timeSeries_3062.100 htmlwidgets_1.5.4
## [55] httr_1.4.2 ellipsis_0.3.2 spatial_7.3-14
## [58] pkgconfig_2.0.3 farver_2.1.0 sass_0.4.0
## [61] dbplyr_2.1.1 utf8_1.2.2 tidyselect_1.1.1
## [64] labeling_0.4.2 rlang_0.4.12 reshape2_1.4.4
## [67] munsell_0.5.0 cellranger_1.1.0 tools_4.1.2
## [70] cli_3.1.0 generics_0.1.1 ade4_1.7-18
## [73] broom_0.7.10 evaluate_0.14 biomformat_1.20.0
## [76] fastmap_1.1.0 yaml_2.2.1 knitr_1.36
## [79] bit64_4.0.5 fs_1.5.0 zip_2.2.0
## [82] nlme_3.1-153 xml2_1.3.2 compiler_4.1.2
## [85] rstudioapi_0.13 curl_4.3.2 ggsignif_0.6.3
## [88] reprex_2.0.1 statmod_1.4.36 statip_0.2.3
## [91] bslib_0.3.1 stringi_1.7.6 highr_0.9
## [94] modeest_2.4.0 fBasics_3042.89.1 Matrix_1.3-4
## [97] nloptr_1.2.2.2 multtest_2.48.0 vctrs_0.3.8
## [100] pillar_1.6.4 lifecycle_1.0.1 rhdf5filters_1.4.0
## [103] jquerylib_0.1.4 data.table_1.14.2 bitops_1.0-7
## [106] stable_1.1.4 R6_2.5.1 rio_0.5.27
## [109] IRanges_2.26.0 codetools_0.2-18 boot_1.3-28
## [112] MASS_7.3-54 assertthat_0.2.1 rhdf5_2.36.0
## [115] withr_2.4.3 S4Vectors_0.30.2 GenomeInfoDbData_1.2.6
## [118] mgcv_1.8-38 parallel_4.1.2 hms_1.1.1
## [121] rpart_4.1-15 timeDate_3043.102 grid_4.1.2
## [124] speedyseq_0.5.3.9018 minqa_1.2.4 rmarkdown_2.11
## [127] carData_3.0-4 rmutil_1.1.5 Rtsne_0.15
## [130] ggpubr_0.4.0 numDeriv_2016.8-1.1 Biobase_2.52.0
## [133] lubridate_1.8.0